import pandas as pd
pd.options.mode.chained_assignment = None
pd.options.display.max_rows=100
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split,ParameterGrid,StratifiedKFold,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import roc_auc_score,log_loss
from sklearn.manifold import TSNE
import plotly.express as px
import plotly
import xgboost as xgb
plotly.offline.init_notebook_mode()
from skopt import BayesSearchCV
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from joblib import dump, load
import os
import pickle
import plotly.offline as pyo
!mkdir Imputer
!mkdir ../cnn
!mkdir ../dnn
mkdir: cannot create directory ‘Imputer’: File exists mkdir: cannot create directory ‘../cnn’: File exists mkdir: cannot create directory ‘../dnn’: File exists
df = pd.read_csv("../data/train.csv")
df_test = pd.read_csv("../data/test.csv")
# df_test = pd.read_csv()
y = df.y
df.shape
(1095, 1614)
df.head()
| sample_id | f0 | f1 | f2 | f3 | f4 | f5 | f6 | f7 | f8 | ... | f1603 | f1604 | f1605 | f1606 | f1607 | f1608 | f1609 | f1610 | f1611 | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | sample_0 | 25.609375 | 6.703125 | 3.652344 | 10.039062 | 169.375 | 102.8125 | 1.422852 | 6.722656 | 8.015625 | ... | 8.070312 | 4.363281 | 5.019531 | 5.710938 | 6.343750 | 6.843750 | 7.289062 | 7.617188 | 7.980469 | 1.0 |
| 1 | sample_1 | 18.343750 | 5.824219 | 2.966797 | 4.902344 | 164.625 | 71.8125 | 1.357422 | 5.894531 | 2.753906 | ... | 7.359375 | 4.195312 | 4.808594 | 5.425781 | 5.949219 | 6.339844 | 6.730469 | 7.074219 | 7.175781 | 1.0 |
| 2 | sample_2 | 28.562500 | 6.230469 | 3.583984 | 7.882812 | 159.500 | 113.1875 | 1.696289 | 6.316406 | 4.605469 | ... | 8.562500 | 4.523438 | 5.097656 | 5.789062 | 6.457031 | 6.871094 | 7.386719 | 7.878906 | 8.328125 | 1.0 |
| 3 | sample_3 | 28.062500 | 6.132812 | 2.726562 | 6.378906 | 169.750 | 111.0000 | 1.535156 | 6.199219 | 3.712891 | ... | 4.558594 | 3.533203 | 3.900391 | 4.261719 | 4.042969 | 3.869141 | 3.890625 | 4.042969 | 4.273438 | 1.0 |
| 4 | sample_4 | 20.109375 | 6.144531 | 3.203125 | 6.035156 | 164.750 | 78.8750 | 1.281250 | 6.187500 | 4.003906 | ... | 6.613281 | 4.625000 | 4.996094 | 5.328125 | 5.593750 | 5.800781 | 6.027344 | 6.242188 | 6.449219 | 0.0 |
5 rows × 1614 columns
df.describe()
| f0 | f1 | f2 | f3 | f4 | f5 | f6 | f7 | f8 | f9 | ... | f1603 | f1604 | f1605 | f1606 | f1607 | f1608 | f1609 | f1610 | f1611 | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | ... | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 | 1095.000000 |
| mean | 27.888784 | 6.307381 | 3.227237 | 7.552864 | 164.607648 | 114.376398 | 1.496589 | 6.370684 | 5.343339 | 7.842965 | ... | 5.889956 | 4.042059 | 4.502239 | 4.966580 | 5.371013 | 5.514062 | 5.709822 | 5.868557 | 5.932324 | 0.612785 |
| std | 20.639422 | 0.453696 | 0.420215 | 2.447590 | 4.183401 | 113.221960 | 0.261378 | 0.434857 | 2.414132 | 0.407226 | ... | 2.198822 | 0.472686 | 0.608721 | 0.759738 | 0.930937 | 1.135693 | 1.372481 | 1.590901 | 1.861157 | 0.487336 |
| min | 13.351562 | 5.464844 | 1.333008 | 1.700195 | 152.125000 | 51.500000 | 0.919434 | 5.527344 | 1.461914 | 7.117188 | ... | 0.000000 | 0.693359 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 20.609375 | 6.003906 | 2.925781 | 5.779297 | 162.062500 | 81.093750 | 1.363281 | 6.070312 | 3.648438 | 7.574219 | ... | 5.097656 | 3.794922 | 4.234375 | 4.628906 | 4.980469 | 5.035156 | 5.125000 | 5.164062 | 5.128906 | 0.000000 |
| 50% | 23.984375 | 6.218750 | 3.222656 | 7.242188 | 164.250000 | 95.062500 | 1.450195 | 6.292969 | 4.867188 | 7.765625 | ... | 6.347656 | 4.085938 | 4.589844 | 5.093750 | 5.542969 | 5.695312 | 5.929688 | 6.136719 | 6.265625 | 1.000000 |
| 75% | 29.320312 | 6.570312 | 3.537109 | 8.980469 | 166.437500 | 116.906250 | 1.574707 | 6.625000 | 6.572266 | 8.070312 | ... | 7.404297 | 4.335938 | 4.890625 | 5.421875 | 5.929688 | 6.230469 | 6.607422 | 6.935547 | 7.189453 | 1.000000 |
| max | 328.000000 | 9.070312 | 4.847656 | 21.406250 | 184.875000 | 1833.000000 | 4.468750 | 8.898438 | 19.890625 | 10.382812 | ... | 11.578125 | 5.230469 | 6.003906 | 6.914062 | 7.800781 | 8.601562 | 9.398438 | 10.179688 | 10.929688 | 1.000000 |
8 rows × 1613 columns
df.drop(['sample_id'],inplace=True,axis=1)
df.drop(['y'],inplace=True,axis =1 )
test_idx = df_test['sample_id']
df_test.drop(['sample_id'],inplace=True,axis=1)
Let's take a closer look at our data.
print(f"Dataset has {df.shape[1]} features")
print(f"Train dataset contains {df.shape[0]} objects")
print(f"Kaggle test dataset contains {df_test.shape[0]} objects")
Dataset has 1612 features Train dataset contains 1095 objects Kaggle test dataset contains 194 objects
Find all NaN and inf in our data.
All inf most likely have the same meaning in the data as NaN. Let's replace all inf on NaN
#Count all NaN for each column
df.replace([np.inf, -np.inf,pd.NA], np.nan,inplace=True)
df_test.replace([np.inf, -np.inf,pd.NA], np.nan,inplace=True)
sort_df = df.isna().sum().sort_values(ascending=False)
Print the number of NaNs for each function, the values are sorted in descending order
sort_df[sort_df>0]
f805 1095
f846 1095
f835 1095
f839 1095
f842 1095
...
f158 2
f671 2
f500 1
f651 1
f267 1
Length: 440, dtype: int64
We found 1095 NaN in several features. This means that this feature contains NaN for ALL objects!!
Let's count this empty features.
print(f"Dataset contains {sort_df[sort_df==df.shape[0]].count()} empty feature")
Dataset contains 72 empty feature
We will remove them later
Let's plot NaN distribution for features which contains NaN's
not_null_nans = sort_df[(sort_df>1) & (sort_df<1095)]
fig = px.histogram(pd.DataFrame({"Features":not_null_nans}),marginal="box")
fig.update_layout(
title="NaN's distribution",
xaxis_title="NaN's in the feature",
yaxis_title="Count",
showlegend = False,
font=dict(
size=10,
)
)
fig.show()
75% of features in sample contains 250 or less NaN fields
Median is 21 NaN's
We CAN remove all features that contains more than 579 NaN's. But we will transform it in categorical data. 1 if dataset contain data and 0 if NaN
Let's test at what percentage of missing data the baseline RandomForest model show the best result
def save_imputer(imputer,n_features,nans):
dump(imputer, f'Imputer/imputer_{n_features}_{nans}.joblib')
#This method convert mostly NaN coluns to categorical data
def droped_values_to_categorical(df,dropped_columns,drop_1095):
df[dropped_columns].replace([np.inf, -np.inf,np.nan], 0,inplace=True)
df[dropped_columns] = np.where(df[dropped_columns] > 0, 1, 0)
df = df.drop(drop_1095,axis=1)
# sort_df_test_set = df.isna().sum().sort_values(ascending=False)
# drop_1095 = sort_df_test_set[sort_df_test_set==df.shape[0]].index
# df = df.drop(drop_1095,axis=1)
return df
sort_df_test_set = df.isna().sum().sort_values(ascending=False)
drop_1095 = sort_df_test_set[sort_df_test_set==df.shape[0]].index
n_nearest_features = [5,10,50]
NaN_percent = [0.1,0.5,0.8] #Percent of NaNs in data. 0.2 means that i
best_auc = -1
best_params = dict()
for n_features in n_nearest_features:
for idx, nans in enumerate(NaN_percent):
df_skip_data_test = df.copy()
dropped_columns = sort_df[sort_df>df_skip_data_test.shape[0]*nans].index
df_skip_data_test = droped_values_to_categorical(df_skip_data_test,dropped_columns,drop_1095)
impute_object = IterativeImputer(random_state=42,n_nearest_features=n_features)
result_impute = impute_object.fit_transform(df_skip_data_test)
result_df = pd.DataFrame(result_impute,columns=df_skip_data_test.columns)
n_splits = 3
skf = StratifiedKFold(n_splits=n_splits,shuffle=True,random_state=42)
auc_roc = 0
for train_index,test_index in skf.split(result_df,y):
X_train, y_train = result_df.iloc[train_index],y.iloc[train_index]
X_test, y_test = result_df.iloc[test_index],y.iloc[test_index]
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train,y_train)
result_rfc = rfc.predict_proba(X_test)
auc_roc += roc_auc_score(y_test,result_rfc[:,1])
auc_roc/=n_splits
if auc_roc>best_auc:
best_params['n_nearest_features'] = n_features
best_params['NaN percent'] = nans
best_auc = auc_roc
print("------------------------------------------------------------------------")
print(f"{idx} | N features: {n_features} | Percent: {nans} | Best AUC ROC: {best_auc:0.5f} | Best params: {best_params}")
save_imputer(impute_object,n_features,nans)
# log_loss_ = log_loss(result_rfc,y_test)
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
0 | N features: 5 | Percent: 0.1 | Best AUC ROC: 0.85072 | Best params: {'n_nearest_features': 5, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
1 | N features: 5 | Percent: 0.5 | Best AUC ROC: 0.85072 | Best params: {'n_nearest_features': 5, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
2 | N features: 5 | Percent: 0.8 | Best AUC ROC: 0.85072 | Best params: {'n_nearest_features': 5, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
0 | N features: 10 | Percent: 0.1 | Best AUC ROC: 0.85105 | Best params: {'n_nearest_features': 10, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
1 | N features: 10 | Percent: 0.5 | Best AUC ROC: 0.85105 | Best params: {'n_nearest_features': 10, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
2 | N features: 10 | Percent: 0.8 | Best AUC ROC: 0.85105 | Best params: {'n_nearest_features': 10, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
0 | N features: 50 | Percent: 0.1 | Best AUC ROC: 0.85105 | Best params: {'n_nearest_features': 10, 'NaN percent': 0.1}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
1 | N features: 50 | Percent: 0.5 | Best AUC ROC: 0.85150 | Best params: {'n_nearest_features': 50, 'NaN percent': 0.5}
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/sklearn/impute/_iterative.py:670: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
------------------------------------------------------------------------
2 | N features: 50 | Percent: 0.8 | Best AUC ROC: 0.85289 | Best params: {'n_nearest_features': 50, 'NaN percent': 0.8}
print(f"Best AUC ROC: {best_auc} | Best params {best_params}")
print(f"576 is {579/df.shape[0]*100:2.0f}% from {df.shape[0]}")
Best AUC ROC: 0.8528890645151533 | Best params {'n_nearest_features': 50, 'NaN percent': 0.8}
576 is 53% from 1095
We have shown in practice that the previously selected value (579=53%~50%) is the best.But after this experiment we will use 90% data with NaN, beacause this show better results. Also, further we will use the 50 nearest neighbors to fill the NaN in the data.
fig = px.bar(x=[0,1],y=[y[y==0].shape[0],y[y==1].shape[0]])
fig.update_layout(xaxis_title="Class",
yaxis_title="Count of objects in class",
showlegend = False)
fig.show()
print(f"{y[y==0].shape[0]/y.shape[0]*100:2.0f}% objects in sample belongs to 0 class")
39% objects in sample belongs to 0 class
Classes are relatively balanced
Let's fix the result by adding a function that returns the dataset cleared of NaN
def get_imputer(percent_of_nan,n_features,path="Imputer/"):
for file in os.listdir(path):
if str(percent_of_nan) in file and str(n_features) in file:
impute_object = load(path+file)
return impute_object
return IterativeImputer(random_state=42,max_iter=10,n_nearest_features=n_features)
def drop_constant_columns(df,constant_columns):
df.drop(constant_columns,axis=1,inplace=True)
return df
def get_data_wo_nan(train_path="../data/train.csv",test_path="../data/test.csv",percent_of_nan=0.8,n_features=100, drop_values=False):
"""Replace all NaN's and Return TrainDataFrame, TrainLabels, KaggleTestData, KaggleTestID"""
df = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
y = df.y
df.drop(['sample_id'],inplace=True,axis=1)
df.drop(['y'],inplace=True,axis=1 )
test_idx = df_test['sample_id']
df_test.drop(['sample_id'],inplace=True,axis=1)
df.replace([np.inf, -np.inf,pd.NA], np.nan,inplace=True)
df_test.replace([np.inf, -np.inf,pd.NA], np.nan,inplace=True)
sort_df = df.isna().sum().sort_values(ascending=False)
columns_with_nan = sort_df[sort_df>df.shape[0]*percent_of_nan].index
sort_df= df.isna().sum().sort_values(ascending=False)
dropped_columns = sort_df[sort_df==df.shape[0]].index
#Drop all NaN wich more than n%
if drop_values:
df_train_dropped = df.drop(columns_with_nan,axis=1)
df_test_dropped = df_test.drop(columns_with_nan,axis=1)
else:
df_train_dropped = droped_values_to_categorical(df,columns_with_nan,dropped_columns)
df_test_dropped = droped_values_to_categorical(df_test,columns_with_nan,dropped_columns)
# constant_columns = df_train_dropped.columns[df_train_dropped.nunique()==1]
# df_train_dropped = drop_constant_columns(df_train_dropped,constant_columns)
# df_test_dropped = drop_constant_columns(df_test_dropped,constant_columns)
impute_object = get_imputer(percent_of_nan,n_features)
try:
result_dataset = impute_object.transform(df_train_dropped)
print(df_test_dropped.shape)
result_dataset_test = impute_object.transform(df_test_dropped)
except:
print("Fit the imputer")
result_dataset = impute_object.fit_transform(df_train_dropped)
result_dataset_test = impute_object.transform(df_test_dropped)
save_imputer(impute_object,n_features,percent_of_nan)
print(f"DataSet has {result_dataset.shape[1]} features")
return result_dataset, y, result_dataset_test, test_idx
result_dataset, y, _, _ = get_data_wo_nan(drop_values=True)
(194, 1481) DataSet has 1481 features
Let's plot and try to describe our data. For this task we will use T-SNE. This method helps us to show distance between classes in 2D plot (Our initial data has 1543 dimension)
def plot_tsne(dataset,y):
"""Plot T-SNE graph"""
tsne = TSNE(random_state=42)
result_tsne = tsne.fit_transform(dataset)
df_ed = pd.DataFrame(data={"Feature X 1":result_tsne[:,0],"Feature X 2":result_tsne[:,1],"y":y})
fig = px.scatter(df_ed,x="Feature X 1",y="Feature X 2",color="y")
fig.show()
print(f"Kullback–Leibler divergence {tsne.kl_divergence_:0.2f}")
plot_tsne(result_dataset,y)
Kullback–Leibler divergence 0.67
The graph shows the areas of dominance of certain classes. But data is very difficult to separate with planes.
Perhaps some of the data is our outliers with a lot of gaps. Let's test this hypothesis by reducing the number of gaps to 10%.
dataset_01, y_01, _, _ = get_data_wo_nan(percent_of_nan=0.1)
plot_tsne(dataset_01,y_01)
(194, 1540) DataSet has 1540 features
Kullback–Leibler divergence 0.67
The result was almost unchanged. Data is mixed. Leave the threshold at 579 ~ 0.53
1) We have a dataset with more features (1438) than objects (1095)
2) The data strongly mixed
3) We have 2 classes with 60 on 40 distribution
Let's try to find model wich can better describe our data
def print_metrics(true_y,predicted):
"""Print ROC AUC metric"""
roc_auc = roc_auc_score(true_y,predicted)
print(f"ROC AUC {roc_auc:0.5f}")
def save_model_parameters(parameter,model_name):
"""Save sklearn model parameters"""
file = open(f"{model_name}_parameters.pkl", "wb")
pickle.dump(parameter, file)
file.close()
def load_model_parameters(model_name):
"""Load sklearn model parameters"""
file = open(f"{model_name}_parameters.pkl", "rb")
parameters = pickle.load(file)
return parameters
result_dataset, labels, result_dataset_test, kaggle_test_idx = get_data_wo_nan(percent_of_nan=0.8)
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/pandas/core/frame.py:4389: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/pandas/core/frame.py:4389: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
(194, 1540) DataSet has 1540 features
Let's test our default baseline model of RandomForest
#Split data on train and test 80 on 20
X_train,X_test,y_train,y_test = train_test_split(result_dataset,y,test_size=0.2,random_state=42,shuffle=True)
rfc = RandomForestClassifier(random_state=42)
#Train baseline model on all data
rfc.fit(X_train,y_train)
result_proba = rfc.predict_proba(X_test)[:,1]
print_metrics(y_test,result_proba)
ROC AUC 0.89487
Commit this baseline
def kaggle_write(result, index,file_name="Answer.csv"):
"""Write result in csv"""
pd.DataFrame({"y":result}, index=index).to_csv(file_name, index=True,index_label="sample_id")
result_proba = rfc.predict_proba(result_dataset_test)[:,1]
kaggle_write(result_proba,kaggle_test_idx)
Result on kaggle public test: 0.86341
We can improve our baseline result with fine tuning
parameters = {'n_estimators' : [ 500, 1000, 1200, 1400, 1500,1600,1800,2000,2500,3000,4000],
'max_depth': [5, 10, 20, 40, 50,60, 70, 80, 100, 120],
'min_samples_split': [2, 4, 8, 10],
'min_samples_leaf': [2, 4, 8, 10],
'min_impurity_decrease': [0, 1e-6, 1e-4, 1e-2,1e-5],
'bootstrap': [False, True]}
rfc = RandomForestClassifier()
bscv = BayesSearchCV(rfc,parameters,n_jobs=-1,n_iter=120,verbose=1,random_state=42)
bscv.fit(result_dataset,y)
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 11.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 14.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 11.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
bscv.best_params_
rfc = RandomForestClassifier(random_state=42)
rfc.set_params(**bscv.best_params_)
rfc.fit(X_train,y_train)
result_rfc = rfc.predict_proba(X_test)[:,1]
print_metrics(y_test,result_rfc)
result_proba = rfc.predict_proba(result_dataset_test)[:,1]
kaggle_write(result_proba,kaggle_test_idx,file_name="RFC_Answer.csv")
save_model_parameters(bscv.best_params_,type(rfc).__name__+"_2")
Let's try to find similar tasks in the internet
This kaggle competition is very similar to our challenge.
https://www.kaggle.com/c/lish-moa
I found two winners result:
1) https://www.kaggle.com/kokitanisaka/moa-ensemble#ResNet
2) https://www.kaggle.com/kento1993/nn-svm-tabnet-xgb-with-pca-cnn-stacking-without-pp
They used Dense Neural Network, CNN and XGBoost. Let's try to do this.
xb = xgb.XGBClassifier()
param_grid = {'booster': ['gbtree', 'gblinear', 'dart'],
'learning_rate': [1e-3, 1e-2, 5e-2, 1e-1],
'gamma': [1e-4,1e-3,1e-2, 1e-1, 1, 2],
'n_estimators': [400,500, 600, 1000, 1500],
'max_depth': [4,5, 10, 20,50],
'min_child_weight': [1e-4, 1e-2, 1e-1],
'max_delta_step': [1e-6, 1e-4, 1e-2],
'subsample': [0.2, 0.5,0.7],
'reg_lambda': [1e-6, 1e-4, 1e-2, 1e-1]}
gs = BayesSearchCV(xb, param_grid,n_iter=200, n_jobs=-1,verbose=1)
gs.fit(X_train, y_train)
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits [16:23:52] WARNING: ../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 11.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 13.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.8min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.9min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 25.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 21.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 58.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 48.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 22.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 14.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 27.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.7min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 37.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 38.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 46.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 59.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 16.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 56.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 49.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 9.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 39.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 42.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 38.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 9.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 48.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 53.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.3min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 14.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 32.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 9.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 24.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 48.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.3min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 9.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.7min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 58.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.6min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 24.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 47.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 43.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 47.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 38.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.6min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.9min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 48.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 46.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 25.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 16.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.3min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 26.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 24.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.2min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 22.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 13.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.3min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 40.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 22.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 41.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 17.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 48.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 27.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 57.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 22.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 24.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 45.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 43.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 15.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 49.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 56.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 33.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 25.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 31.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 55.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 3.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 3.9min finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before. [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.1min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.0min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 50.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.5min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 26.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.8min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 27.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 53.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 59.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 35.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 25.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 5.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 54.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 14.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 19.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 53.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 49.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 34.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 40.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.7s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 49.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 24.7s finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before. [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 27.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 31.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.2min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.6s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.3min finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.1s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 1.4min finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before. [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 26.8s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 44.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 23.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 49.0s finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before. [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.4s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 54.2s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 53.5s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 52.0s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.3s finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before. [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 29.3s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 51.9s finished [Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers. [Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 28.9s finished /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/xgboost/sklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
BayesSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None, gamma=None,
gpu_id=None, importance_type='gain',
interaction_constraints=None,
learning_rate=None, max_delta_step=None,
max_depth=None, min_child_weight=None,
missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=Non...
n_iter=200, n_jobs=-1,
search_spaces={'booster': ['gbtree', 'gblinear', 'dart'],
'gamma': [0.0001, 0.001, 0.01, 0.1, 1, 2],
'learning_rate': [0.001, 0.01, 0.05, 0.1],
'max_delta_step': [1e-06, 0.0001, 0.01],
'max_depth': [4, 5, 10, 20, 50],
'min_child_weight': [0.0001, 0.01, 0.1],
'n_estimators': [400, 500, 600, 1000, 1500],
'reg_lambda': [1e-06, 0.0001, 0.01, 0.1],
'subsample': [0.2, 0.5, 0.7]},
verbose=1)
X_train,X_test,y_train,y_test = train_test_split(result_dataset, labels,test_size=0.2,random_state=42,shuffle=True)
print(f'Best parameters: {gs.best_params_}')
xb = xgb.XGBClassifier(**gs.best_params_)
xb.fit(X_train,y_train)
result_xb = xb.predict(X_test)
print_metrics(y_test,result_xb)
result_proba = gs.predict_proba(result_dataset_test)[:,1]
kaggle_write(result_proba,kaggle_test_idx,file_name="XGB_Answer.csv")
Best parameters: OrderedDict([('booster', 'dart'), ('gamma', 0.0001), ('learning_rate', 0.001), ('max_delta_step', 0.01), ('max_depth', 10), ('min_child_weight', 0.1), ('n_estimators', 1000), ('reg_lambda', 0.01), ('subsample', 0.5)])
[09:11:29] WARNING: ../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
ROC AUC 0.77328
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/xgboost/sklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
#Save model parameters
save_model_parameters(gs.best_params_,type(xb).__name__+"_2")
#CNN idea from Mechanisms of Action prediction https://www.kaggle.com/kento1993/nn-svm-tabnet-xgb-with-pca-cnn-stacking-without-pp#Stage-2:-NN
class CNN(nn.Module):
def __init__(self, in_chanel):
super(CNN, self).__init__()
self.in_chanel = in_chanel
self.batch_norm_1 = nn.BatchNorm2d(1)
self.conv_1 = nn.Conv2d(1, 8, 3)
self.pooling_1 = nn.MaxPool2d(2)
self.batch_norm_2 = nn.BatchNorm2d(8)
self.conv_2 = nn.Conv2d(8, 16, 3)
self.pooling_2 = nn.MaxPool2d(2)
self.batch_norm_3 = nn.BatchNorm2d(16)
self.conv_3 = nn.Conv2d(16, 32, 3)
self.pooling_3 = nn.MaxPool2d(2)
self.batch_norm_4 = nn.BatchNorm2d(32)
self.flatten = nn.Flatten()
self.dropout = nn.Dropout(0.2)
self.linear = nn.Linear(128, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = torch.reshape(x, (-1, 1, 32, 32))
x = self.batch_norm_1(x)
x = self.conv_1(x)
x = F.relu(self.pooling_1(x))
x = self.batch_norm_2(x)
x = self.conv_2(x)
x = F.relu(self.pooling_2(x))
x = self.batch_norm_3(x)
x = self.conv_3(x)
x = F.relu(self.pooling_3(x))
x = self.batch_norm_4(x)
x = torch.squeeze(x)
# print(x.shape)
x = self.dropout(self.flatten(x))
x = self.sigmoid(self.linear(x))
return x
#DNN idea from Mechanisms of Action prediction https://www.kaggle.com/nischaydnk/fork-of-blending-with-6-models-5old-1new#203-101-nonscored-pred-2layers.ipynb
class DNN(nn.Module):
def __init__(self, in_chanel, hidden_layer_size):
super(DNN, self).__init__()
self.batch_norm1 = nn.BatchNorm1d(in_chanel)
self.dropout1 = nn.Dropout(0.15)
self.dense1 = nn.utils.weight_norm(nn.Linear(in_chanel, hidden_layer_size))
self.batch_norm3 = nn.BatchNorm1d(hidden_layer_size)
self.dropout3 = nn.Dropout(0.1)
self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_layer_size, 1))
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.batch_norm1(x)
x = self.dropout1(x)
x = F.relu(self.dense1(x))
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.sigmoid(self.dense3(x))
return x
#Default AutoEncoder with 1 hidden layer
class AutoEncoder(nn.Module):
def __init__(self, in_chanel, hidden_layer_size=500):
super(AutoEncoder, self).__init__()
self.batch_norm_1 = nn.BatchNorm1d(in_chanel)
self.dense_1 = nn.Linear(in_chanel, hidden_layer_size)
self.dense_2 = nn.Linear(hidden_layer_size, hidden_layer_size)
self.dense_3 = nn.Linear(hidden_layer_size, in_chanel)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = F.relu(self.dense_1(x))
x = F.relu(self.dense_3(x))
return x
def encode(self, x):
x = F.relu(self.dense_1(x))
return x
#Cration own Data Provider
class Data_Provider(Dataset):
def __init__(self, X, y):
super(Data_Provider, self).__init__()
self.X_ = X
self.y_ = y.to_numpy()
def __getitem__(self, index):
return self.X_[index, :], self.y_[index].flatten()
def __len__(self):
return len(self.X_)
#Normalizer saver
def save_normalizer(normalizer, file_name='nn_dumped_normalizer.pkl'):
with open(file_name, 'wb') as fid:
pickle.dump(normalizer, fid)
#Normalizer loader
def load_normalizer(file_name='nn_dumped_normalizer.pkl'):
normalizer = None
for file in os.listdir():
if file_name in file:
with open(file_name, 'rb') as fid:
normalizer = pickle.load(fid)
return normalizer
def get_data_for_nn(train_path="./data/train.csv", test_path="./data/test.csv", percent_of_nan=0.9, n_features=100,
drop_train_nan=True):
"""Default get_data_wo_nn with normalizer"""
result_dataset, labels, result_dataset_test, kaggle_test_idx = get_data_wo_nan(percent_of_nan=0.8)
normalizer = load_normalizer()
if normalizer:
result_dataset = normalizer.transform(result_dataset)
result_dataset_test = normalizer.transform(result_dataset_test)
else:
normalizer = StandardScaler()
result_dataset = normalizer.fit_transform(result_dataset)
result_dataset_test = normalizer.transform(result_dataset_test)
save_normalizer(normalizer)
return result_dataset, y, result_dataset_test, test_idx
def get_train_test_loaders(X, y, batch_size, test_size=0.2, random_state=42):
"""Create train and test loaders"""
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dataset_train = Data_Provider(X_train, y_train)
dataset_test = Data_Provider(X_test, y_test)
train_loader = DataLoader(
dataset=dataset_train,
batch_size=batch_size,
num_workers=4
)
test_loader = DataLoader(
dataset=dataset_test,
batch_size=batch_size,
num_workers=4
)
return train_loader, test_loader
#Train encoder function
def train_autoencoder(train_data=None, test_data=None, lr=0.1e-3, batch_size=500, epochs=300, test=False):
"""Main train code fore autoencoder"""
result_dataset, y, result_dataset_test, test_idx = get_data_for_nn()
has_cuda = True
model = AutoEncoder(result_dataset.shape[1], 1024).to(DEVICE)
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = torch.nn.DataParallel(model)
# file_name = "model_min_loss.pkl"
# if os.path.exists(file_name):
# model.load_state_dict(torch.load(file_name))
# print("-------------------Get " + file_name + "-------------------")
model.double()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, mode="min", threshold=1e-7,factor=0.9, patience=50, verbose=True)
train_loader, test_loader = get_train_test_loaders(result_dataset, y, batch_size)
prev_test_loss = 10000
#Train loop
for epoch in range(epochs):
loss_train_ = 0.0
model.train()
for X, _ in train_loader:
X = X.to(DEVICE)
result = model(X)
loss_train = F.mse_loss(result, X)
loss_train_ += loss_train.item()
loss_train.backward()
optimizer.step()
scheduler.step(loss_train_)
model.eval()
#Test
with torch.no_grad():
test_loss = 0
for idx, (X_test, _) in enumerate(test_loader):
if has_cuda:
X_test = X_test.to(DEVICE)
result_test = model(X_test)
test_loss += F.mse_loss(result_test, X_test).item()
print(f'Epoch:{epoch + 1}, Train Loss:{loss_train_:.6f}, Test Loss:{test_loss:.6f}.')
if prev_test_loss > test_loss:
prev_test_loss = test_loss
torch.save(model.state_dict(), 'autoencoder/model_min_loss.pkl')
if epoch % 100 == 0:
torch.save(model.state_dict(), 'autoencoder/model_newest_' + str(epoch) + '.pkl')
def train_nn(lr=0.1e-3, batch_size=500, epochs=300, cnn=True, path_to_model_save=""):
result_dataset, y, _, _ = get_data_for_nn()
has_cuda = True
if cnn:
print("We load CNN")
model = CNN(1024).to(DEVICE)
else:
print("We load DNN")
model = DNN(1024, 512).to(DEVICE)
#Load encoder
encoder = AutoEncoder(result_dataset.shape[1], 1024).to(DEVICE)
encoder.load_state_dict(torch.load("../autoencoder/model_min_loss.pkl"))
encoder.eval()
encoder.double()
# file_name = "model_min_loss.pkl"
# if os.path.exists(file_name):
# model.load_state_dict(torch.load(file_name))
# print("-------------------Get " + file_name + "-------------------")
model.double()
#Load scheduler and optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, threshold=1e-7, mode="min", factor=0.9, patience=15, verbose=True)
train_loader, test_loader = get_train_test_loaders(result_dataset, y, batch_size)
loss_file_train = open('loss_log_train.txt', 'w+')
loss_file_test = open('loss_log_test.txt', 'w+')
prev_test_loss = 10000
#Load Train loop
for epoch in range(epochs):
loss_train_ = 0.0
model.train()
for X, y in train_loader:
X = X.to(DEVICE)
y = y.to(DEVICE)
X = encoder.encode(X)
result = model(X)
loss_train = F.binary_cross_entropy(result, y)
loss_train_ += loss_train.item()
loss_train.backward()
optimizer.step()
scheduler.step(loss_train_)
model.eval()
#Test our Neural Network
with torch.no_grad():
test_loss = 0
for idx, (X_test, y_test) in enumerate(test_loader):
if has_cuda:
X_test, y_test = X_test.to(DEVICE), y_test.to(DEVICE)
X_test = encoder.encode(X_test)
result_test = model(X_test)
test_loss += F.binary_cross_entropy(result_test, y_test.double()).item()
#Print metrics
print(f'Epoch:{epoch + 1}, Train Loss:{loss_train_:.5f}, Test Loss:{test_loss:.5f}')
loss_file_train.write('{},'.format(loss_train_))
loss_file_test.write('{},'.format(test_loss))
#Write minloss
if prev_test_loss > test_loss:
prev_test_loss = test_loss
torch.save(model.state_dict(), f'{path_to_model_save}model_min_loss.pkl')
#Save model each 50 epochs
if epoch % 50 == 0:
torch.save(model.state_dict(), f'model_newest_{epoch}.pkl')
model.load_state_dict(torch.load(path_to_model_save + f'{path_to_model_save}model_min_loss.pkl'))
return model
def write_for_kaggle(model, file_name="NN_Answer.csv"):
"""Write model kaggle test result in csv"""
print("---------Evaluating AUC ROC---------")
result_dataset, y, result_dataset_test, test_idx = get_data_for_nn()
encoder = AutoEncoder.AutoEncoder(result_dataset.shape[1], 1024).to(DEVICE)
encoder.load_state_dict(torch.load("../autoencoder/model_min_loss.pkl"))
encoder.eval()
input_data = result_dataset_test
input_data = encoder.encode(torch.Tensor(input_data).to(DEVICE))
kaggle_result = model(input_data)
kaggle_result = kaggle_result.cpu().detach().numpy().flatten()
kaggle_write(kaggle_result, test_idx, file_name)
if torch.cuda.is_available():
DEVICE = torch.device("cuda")
print("\n--------------We use GPU!--------------\n")
else:
DEVICE = torch.device("cpu")
# model = train_nn(lr=0.1e-4, batch_size=1000, epochs=1000,cnn=False,path_to_model_save='../dnn/')
# write_for_kaggle(model, file_name='DNN_Answer.csv')
model = train_nn(lr=0.1e-4, batch_size=1000, epochs=1000, cnn=True, path_to_model_save='../cnn/')
write_for_kaggle(model, file_name='CNN_Answer.csv')
--------------We use GPU!--------------
/home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/pandas/core/frame.py:4389: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/pandas/core/frame.py:4389: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
(194, 1540) DataSet has 1540 features We load CNN Epoch:1, Train Loss:0.72639, Test Loss:0.69279 Epoch:2, Train Loss:0.73027, Test Loss:0.68671 Epoch:3, Train Loss:0.73454, Test Loss:0.68204 Epoch:4, Train Loss:0.72583, Test Loss:0.67828 Epoch:5, Train Loss:0.73427, Test Loss:0.67526 Epoch:6, Train Loss:0.73240, Test Loss:0.67280 Epoch:7, Train Loss:0.71692, Test Loss:0.67066 Epoch:8, Train Loss:0.72847, Test Loss:0.66890 Epoch:9, Train Loss:0.72784, Test Loss:0.66744 Epoch:10, Train Loss:0.72710, Test Loss:0.66625 Epoch:11, Train Loss:0.73906, Test Loss:0.66531 Epoch:12, Train Loss:0.72140, Test Loss:0.66452 Epoch:13, Train Loss:0.72024, Test Loss:0.66362 Epoch:14, Train Loss:0.72547, Test Loss:0.66271
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-58-975806ebeb8d> in <module>() 7 # model = train_nn(lr=0.1e-4, batch_size=1000, epochs=1000,cnn=False,path_to_model_save='../dnn/') 8 # write_for_kaggle(model, file_name='DNN_Answer.csv') ----> 9 model = train_nn(lr=0.1e-4, batch_size=1000, epochs=1000, cnn=True, path_to_model_save='../cnn/') 10 write_for_kaggle(model, file_name='CNN_Answer.csv') <ipython-input-55-20e25fe27270> in train_nn(lr, batch_size, epochs, cnn, path_to_model_save) 33 loss_train_ = 0.0 34 model.train() ---> 35 for X, y in train_loader: 36 X = X.to(DEVICE) 37 y = y.to(DEVICE) /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __iter__(self) 350 return self._iterator 351 else: --> 352 return self._get_iterator() 353 354 @property /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _get_iterator(self) 292 return _SingleProcessDataLoaderIter(self) 293 else: --> 294 return _MultiProcessingDataLoaderIter(self) 295 296 @property /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __init__(self, loader) 799 # before it starts, and __del__ tries to join but will get: 800 # AssertionError: can only join a started process. --> 801 w.start() 802 self._index_queues.append(index_queue) 803 self._workers.append(w) /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/multiprocessing/process.py in start(self) 103 'daemonic processes are not allowed to have children' 104 _cleanup() --> 105 self._popen = self._Popen(self) 106 self._sentinel = self._popen.sentinel 107 # Avoid a refcycle if the target function holds an indirect /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/multiprocessing/context.py in _Popen(process_obj) 221 @staticmethod 222 def _Popen(process_obj): --> 223 return _default_context.get_context().Process._Popen(process_obj) 224 225 class DefaultContext(BaseContext): /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/multiprocessing/context.py in _Popen(process_obj) 275 def _Popen(process_obj): 276 from .popen_fork import Popen --> 277 return Popen(process_obj) 278 279 class SpawnProcess(process.BaseProcess): /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/multiprocessing/popen_fork.py in __init__(self, process_obj) 17 util._flush_std_streams() 18 self.returncode = None ---> 19 self._launch(process_obj) 20 21 def duplicate_for_child(self, fd): /home/nelubin/anaconda3/envs/ExpaSoft/lib/python3.6/multiprocessing/popen_fork.py in _launch(self, process_obj) 64 code = 1 65 parent_r, child_w = os.pipe() ---> 66 self.pid = os.fork() 67 if self.pid == 0: 68 try: KeyboardInterrupt:
Model_Name:ROC AUC 1) XGB: 0.88201
2) RandomForest Classifier: 0.87439
3) DenseNeuralNetwork: ~0.84
4) CNN (2D): ~0.82
The best result was shown by the XGB model.But according to my expectations, the CNN model should have had the best result. Such a bad result may be due to misinterpretation of the data (due to a misunderstanding of the business process).
What can be improved: 1) Build 1D CNN
2) Try another autoencoder architecture
3) Try stacking models.